
clear
cd "D:\data_replication"

use estimation\1_data_format\data_base.dta, clear

keep if year < 2006
keep if declarant == 5

// Compute Domestic Share for each product and create index
//----------------------------------------------------------------------------

sort product_id
gen dom_share_temp = exp_share if declarant == partner
by product_id: egen dom_share = mean(dom_share_temp)        
by product_id: keep if _n == 1

keep pc8plus dom_share
gen no_dom_pc8plus = 1
replace no_dom_pc8plus = 0 if dom_share != .
gen pc = pc8plus


// Aggregate to 7-digit level
//----------------------------------------------------------------------------

gen ll = length(pc8plus)
gen pc7 = substr(pc8plus, 1, 7) if ll == 8
replace pc7 = pc8plus if ll != 8
//replace pc7 = "2511157" if pc8plus == "7.2004" 
sort pc7
by pc7: egen dom_share_pc7 = mean(dom_share)
by pc7: egen replace_pc7 = sum(no_dom_pc8plus)                                  // Test if any pc8 in the pc7 had no dom share
gen no_dom_pc7 = 1
replace no_dom_pc7 = 0 if dom_share_pc7 != .
replace pc = pc7 if replace_pc7 > 0 & dom_share_pc7 != .

// Aggregate to 6-digit level
//----------------------------------------------------------------------------

gen pc6 = substr(pc8plus, 1, 6) if ll == 8
replace pc6 = pc8plus if ll != 8
//replace pc6 = "251115" if pc8plus == "7.2004" 
sort pc6
by pc6: egen dom_share_pc6 = mean(dom_share)
by pc6: egen replace_pc6 = sum(no_dom_pc7)                                      // Test if any pc8 in the pc6 had no dom share
gen no_dom_pc6 = 1
replace no_dom_pc6 = 0 if dom_share_pc6 != .
replace pc = pc6 if replace_pc6 > 0 & dom_share_pc6 != .

// Aggregate to 5-digit level
//----------------------------------------------------------------------------

gen pc5 = substr(pc8plus, 1, 5) if ll == 8
replace pc5 = pc8plus if ll != 8
replace pc5 = "33403" if pc8plus == "7.2007"
sort pc5
by pc5: egen dom_share_pc5 = mean(dom_share)
by pc5: egen replace_pc5 = sum(no_dom_pc6)                                      // Test if any pc8 in the pc5 had no dom share
gen no_dom_pc5 = 1
replace no_dom_pc5 = 0 if dom_share_pc5 != .
replace pc = pc5 if replace_pc5 > 0 & dom_share_pc5 != .

// Aggregate to 4-digit level
//----------------------------------------------------------------------------

gen pc4 = substr(pc8plus, 1, 4) if ll == 8
replace pc4 = pc8plus if ll != 8
replace pc4 = "3340" if pc8plus == "7.2007"
sort pc4
by pc4: egen dom_share_pc4 = mean(dom_share)
by pc4: egen replace_pc4 = sum(no_dom_pc5)                                      // Test if any pc8 in the pc4 had no dom share
gen no_dom_pc4 = 1
replace no_dom_pc4 = 0 if dom_share_pc4 != .
replace pc = pc4 if replace_pc4 > 0 & dom_share_pc4 != .

// Aggregate to 3-digit level
//----------------------------------------------------------------------------

gen pc3 = substr(pc8plus, 1, 3) if ll == 8
replace pc3 = pc8plus if ll != 8
replace pc3 = "334" if pc8plus == "7.2007"
sort pc3
by pc3: egen dom_share_pc3 = mean(dom_share)
by pc3: egen replace_pc3 = sum(no_dom_pc4)                                      // Test if any pc8 in the pc3 had no dom share
gen no_dom_pc3 = 1
replace no_dom_pc3 = 0 if dom_share_pc3 != .
replace pc = pc3 if replace_pc3 > 0 & dom_share_pc3 != .

// Aggregate to 2-digit level
//----------------------------------------------------------------------------

gen pc2 = substr(pc8plus, 1, 2) if ll == 8
replace pc2 = pc8plus if ll != 8
replace pc2 = "33" if pc8plus == "7.2007"
sort pc2
by pc2: egen dom_share_pc2 = mean(dom_share)
by pc2: egen replace_pc2 = sum(no_dom_pc3)                                      // Test if any pc8 in the pc2 had no dom share
gen no_dom_pc2 = 1
replace no_dom_pc2 = 0 if dom_share_pc2 != .
replace pc = pc2 if replace_pc2 > 0 & dom_share_pc2 != .

save estimation\1_product_list\output\A5_data_italy_temp.dta, replace


// Summarize composite categories
//------------------------------------------------------------------------------

sort pc
gen pc8plus_d = 0
replace pc8plus_d = 1 if ll != 8 & pc8plus != pc                                // Find synthetic pc8plus that were aggregated
sort pc
by pc: egen pc8plus_comp = max(pc8plus_d)                                       // Identify categories with synthetic pc8plus

keep if pc8plus_comp == 1                                                       // Keep categories with synthetic pc8plus
keep if ll != 8
keep pc pc8plus
order pc pc8plus
save estimation\1_product_list\output\A5_pc_comp_italy.dta, replace


// Summarize non-composite categories
//------------------------------------------------------------------------------

use estimation\1_product_list\output\A5_data_italy_temp.dta, replace
sort pc
gen pc8plus_d = 0
replace pc8plus_d = 1 if ll != 8 & pc8plus != pc
sort pc
by pc: egen pc8plus_comp = max(pc8plus_d)

drop if pc8plus_comp == 1                                                       // Drop categories with synthetic pc8plus
sort pc
by pc: keep if _n == 1
keep pc 
save estimation\1_product_list\output\A5_pc_italy.dta, replace

rm estimation\1_product_list\output\A5_data_italy_temp.dta

